R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

#dir.create("mers")
#setwd('~/./mers')
mers <- read.csv('cases.csv')
#INSTALL PACKAGES FIRST, THEN LOAD LIBRARY

mers$hospitalized[890] <- c('2015-02-20')
head(mers)
##   number FT KSA_case code gender age country province  city district
## 1      1  2           25M      M  25  Jordan          Zarqa         
## 2      2              30M      M  30  Jordan          Zarqa         
## 3      3  1           40F      F  40  Jordan          Zarqa         
## 4      4              60M      M  60  Jordan          Zarqa         
## 5      5              29M      M  29  Jordan          Zarqa         
## 6      6              33M      M  33  Jordan          Zarqa         
##   prior_travel hospital exposure      onset hospitalized sampled reported
## 1                                2012-03-21   2012-04-04                 
## 2                                2012-03-30   2012-04-08                 
## 3                                2012-04-02   2012-04-09                 
## 4                                2012-04-02                              
## 5                                2012-04-11   2012-04-15                 
## 6                                2012-04-12   2012-04-14                 
##        death discharged comorbidity severity outcome    clinical
## 1 2012-04-25                           fatal   fatal       fatal
## 2                                        CCU            clinical
## 3 2012-04-19                           fatal   fatal       fatal
## 4                                                    subclinical
## 5                                        CCU            clinical
## 6                                        CCU            clinical
##   old_cluster cluster Cauchemez.cluster animal_contact camel_contact   HCW
## 1           A       A                 4          FALSE               FALSE
## 2           A       A                 4          FALSE                TRUE
## 3           A       A                 4          FALSE                TRUE
## 4           A       A                 4          FALSE                TRUE
## 5           A       A                 4                               TRUE
## 6           A       A                 4                               TRUE
##   contact_with            contact secondary suspected inferred    notes
## 1                                                           NA         
## 2            1 health care worker      TRUE      TRUE       NA probable
## 3            1 health care worker      TRUE                 NA         
## 4            1 health care worker      TRUE      TRUE       NA probable
## 5              health care worker      TRUE      TRUE       NA probable
## 6            1 health care worker      TRUE      TRUE       NA probable
##                                                                         citation
## 1 http://applications.emro.who.int/emhj/v19/Supp1/EMHJ_2013_19_Supp1_S12_S18.pdf
## 2 http://applications.emro.who.int/emhj/v19/Supp1/EMHJ_2013_19_Supp1_S12_S18.pdf
## 3 http://applications.emro.who.int/emhj/v19/Supp1/EMHJ_2013_19_Supp1_S12_S18.pdf
## 4 http://applications.emro.who.int/emhj/v19/Supp1/EMHJ_2013_19_Supp1_S12_S18.pdf
## 5 http://applications.emro.who.int/emhj/v19/Supp1/EMHJ_2013_19_Supp1_S12_S18.pdf
## 6 http://applications.emro.who.int/emhj/v19/Supp1/EMHJ_2013_19_Supp1_S12_S18.pdf
##   citation2 citation3 citation4 citation5       sequence accession patient
## 1                                                                        1
## 2                                                                        2
## 3                                         Jordan-N3_2012  KC776174       3
## 4                                                                        4
## 5                                                                        5
## 6                                                                        6
##   speculation  X                                         X.1
## 1             NA http://promedmail.org/direct.php?id=3587349
## 2             NA                                            
## 3             NA                                            
## 4             NA                                            
## 5             NA                                            
## 6             NA
mers <- mers[-471,]
#install.packages("lubridate" and "ggplot2)
library(lubridate)
## Warning: package 'lubridate' was built under R version 3.4.4
## 
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
## 
##     date
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.4.4
library(plotly)
## Warning: package 'plotly' was built under R version 3.4.4
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
mers$onset2 <- ymd(mers$onset)
mers$hospitalized2 <- ymd(mers$hospitalized)
## Warning: 5 failed to parse.
class(mers$onset2)
## [1] "Date"
day0 <- min(na.omit(mers$onset2))
mers$epi.day <- as.numeric(mers$onset2 - day0)

ggplot1 <- ggplot(data=mers) + #turned the graph into an opject to make it easier to include all plots in report
  geom_bar(mapping=aes(x=epi.day)) +
  labs(x="Epidemic day", y="Case count", title='Global count of MERS cases by date of symptom onset', caption="Data from: https://github.com/rambaut/MERS-Cases/blob/gh-pages/data/cases.csv")
ggplotly(p=ggplot1) 
## We recommend that you use the dev version of ggplot2 with `ggplotly()`
## Install it with: `devtools::install_github('hadley/ggplot2')`
## Warning: Removed 535 rows containing non-finite values (stat_count).
ggplot2 <- ggplot(data=mers) +
  geom_bar(mapping=aes(x=epi.day, fill=country)) +
  labs(x="Epidemic day", y="Case count", title='Global count of MERS cases by date of symptom onset', caption="Data from: https://github.com/rambaut/MERS-Cases/blob/gh-pages/data/cases.csv")
ggplotly(p=ggplot2)
## We recommend that you use the dev version of ggplot2 with `ggplotly()`
## Install it with: `devtools::install_github('hadley/ggplot2')`
## Warning: Removed 535 rows containing non-finite values (stat_count).
## Warning: position_stack requires non-overlapping x intervals
#CHANGED Y AXIS MAX TO 15
ggplot3 <- ggplot(data=mers) +
  geom_bar(mapping=aes(x=epi.day, fill=country)) +
  ylim (0, 15) +
  labs(x="Epidemic day", y="Case count", title='Global count of MERS cases by date of symptom onset', caption="Data from: https://github.com/rambaut/MERS-Cases/blob/gh-pages/data/cases.csv")
ggplotly(p=ggplot3)
## We recommend that you use the dev version of ggplot2 with `ggplotly()`
## Install it with: `devtools::install_github('hadley/ggplot2')`
## Warning: Removed 535 rows containing non-finite values (stat_count).

## Warning: position_stack requires non-overlapping x intervals
#CHANGED COLOUR PALETTE
require(RColorBrewer)
## Loading required package: RColorBrewer
## Warning: package 'RColorBrewer' was built under R version 3.4.1
ggplot4 <- ggplot(data=mers) +
  geom_bar(mapping=aes(x=epi.day, fill=country)) +
  labs(x="Epidemic day", y="Case count", title='Global count of MERS cases by date of symptom onset', caption="Data from: https://github.com/rambaut/MERS-Cases/blob/gh-pages/data/cases.csv")
ggplot4 + scale_fill_brewer(palette="PRGn")
## Warning: Removed 535 rows containing non-finite values (stat_count).

## Warning: position_stack requires non-overlapping x intervals
## Warning in RColorBrewer::brewer.pal(n, pal): n too large, allowed maximum for palette PRGn is 11
## Returning the palette you asked for with that many colors

ggplotly(p=ggplot4)
## We recommend that you use the dev version of ggplot2 with `ggplotly()`
## Install it with: `devtools::install_github('hadley/ggplot2')`
## Warning: Removed 535 rows containing non-finite values (stat_count).
## Warning: position_stack requires non-overlapping x intervals
#CALCULATE THE ACTUAL  INFECTIOUS PERIOD (RAW DATA)
mers$infectious.period <- mers$hospitalized2-mers$onset2

#CHECK WHAT CLASS OF DATA THE INFECTIOUS PERIOD IS CLASSIFIED AS
class(mers$infectious.period)
## [1] "difftime"
#result = "difftime", which means time intervals/differences

#NOW, SPECIFY THAT THE UNITS ARE DAYS. THIS COMMAND ALSO CHANGES A FACTOR INTO A NUMBERIC (as.numeric)
mers$infectious.period <- as.numeric(mers$infectious.period, units = "days")

ggplot5 <- ggplot(data=mers) +
  geom_histogram(aes(x=infectious.period)) +
  labs(x="Infectious period", y="Frequency", title="Distribution of calculated MERS infectious period", caption = "Data from: https://github.com/rambaut/MERS-Cases/blob/gh-pages/data/cases.csv")
ggplotly(p=ggplot5)
## We recommend that you use the dev version of ggplot2 with `ggplotly()`
## Install it with: `devtools::install_github('hadley/ggplot2')`
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 727 rows containing non-finite values (stat_bin).
#CHANGE X AXIS RANGE:
ggplot6 <- ggplot(data=mers) +
  geom_histogram(aes(x=infectious.period)) +
  xlim (-100,150) +
  labs(x="Infectious period", y="Frequency", title="Distribution of calculated MERS infectious period", caption = "Data from: https://github.com/rambaut/MERS-Cases/blob/gh-pages/data/cases.csv")
ggplotly(p=ggplot6)
## We recommend that you use the dev version of ggplot2 with `ggplotly()`
## Install it with: `devtools::install_github('hadley/ggplot2')`
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 740 rows containing non-finite values (stat_bin).
#IFELSE CAN BE USED TO CONSTRUCT A VECTOR WITH TRUE/FALSE WHEN PERIOD < 0
#calculated infectious period in the case where it is positive and zero otherwise
mers$infectious.period2 <-ifelse(mers$infectious.period<0,0,mers$infectious.period)

#THIS CODE WILL EXCLUDE VALUES <0
ggplot7 <- ggplot(data=mers) +
  geom_histogram(aes(x=infectious.period2)) +
  labs(x='Infectious period', y='Frequency', title='Distribution of calculated MERS infectious period (positive values only)', caption='Data')
ggplotly(p=ggplot7)
## We recommend that you use the dev version of ggplot2 with `ggplotly()`
## Install it with: `devtools::install_github('hadley/ggplot2')`
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 727 rows containing non-finite values (stat_bin).
#CHANGE PLOT TYPE TO DENSITY PLOT
ggplot8 <- ggplot(data=mers) +
  geom_density(aes(x=infectious.period2)) +
  labs(x='Infectious period', y='Frequency', title='Probability density for MERS infectious period (positive values only)', caption='Data from: https://github.com/rambaut/MERS???Cases/blob/gh???pages/data/cases.csv')
ggplotly(p=ggplot8)
## We recommend that you use the dev version of ggplot2 with `ggplotly()`
## Install it with: `devtools::install_github('hadley/ggplot2')`
## Warning: Removed 727 rows containing non-finite values (stat_density).
#RATHER THAN DENSITY PLOT, CAN GRAPH THE PLOT AREA (SIMPLY, BOXES ARE FILLED IN)
#THE DATA NEEDS TO BE BINNED TO PLOT (DISCREET INTERVALS)
ggplot9 <- ggplot(data=mers) +
  geom_area(stat='bin', mapping=aes(x=infectious.period2)) +
  labs(x='Infectious period', y='Frequency', title='Area plot for MERS infectious period (positive values only)', caption='Data from: https://github.com/rambaut/MERS???Cases/blob/gh???pages/data/cases.csv')
ggplotly(p=ggplot9)
## We recommend that you use the dev version of ggplot2 with `ggplotly()`
## Install it with: `devtools::install_github('hadley/ggplot2')`
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 727 rows containing non-finite values (stat_bin).
#NOW CHANGE TO DOT PLOT
ggplot10 <- ggplot(data=mers) +
  geom_dotplot(stat='bin', mapping=aes(x=infectious.period2)) +
  labs(x='Infectious period', y='Frequency', title='Area plot for MERS infectious period (positive values only)', caption='Data from: https://github.com/rambaut/MERS???Cases/blob/gh???pages/data/cases.csv')
## Warning: Ignoring unknown parameters: stat
ggplotly(p=ggplot10)
## We recommend that you use the dev version of ggplot2 with `ggplotly()`
## Install it with: `devtools::install_github('hadley/ggplot2')`
## `stat_bindot()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 727 rows containing non-finite values (stat_bindot).
#PLOT AREA, BUT RATHER THEN INFECTIOUS PERIOD AND FREQUENCY, PLOTTING INFECTIOUS PERIOD AND
#THE DATA NEEDS TO BE BINNED TO PLOT (DISCREET INTERVALS)
#I HAVE ALSO CHANGED THE Y AXIS MAX TO 40
#THIS CODE BUILDS UPON THE ABOVE, THOUGH SMOOTHING THE LINE
ggplot11 <- ggplot(data=mers) +
  geom_smooth(aes(x=epi.day, y=infectious.period2), method="loess") +
  labs(x='Day', y='Infectious Period', title='Probability density for MERS infectious period (positive values only)', caption='Data from: https://github.com/rambaut/MERS???Cases/blob/gh???pages/data/cases.csv')
ggplotly(p=ggplot11)
## We recommend that you use the dev version of ggplot2 with `ggplotly()`
## Install it with: `devtools::install_github('hadley/ggplot2')`
## Warning: Removed 727 rows containing non-finite values (stat_smooth).
#PLOT AREA, BUT RATHER THEN INFECTIOUS PERIOD AND FREQUENCY, PLOTTING INFECTIOUS PERIOD AND
#THE DATA NEEDS TO BE BINNED TO PLOT (DISCREET INTERVALS)
#I HAVE ALSO CHANGED THE Y AXIS MAX TO 40
ggplot12 <- ggplot(data=mers) +
  geom_point(aes(x=epi.day, y=infectious.period2, colour=country)) +
  ylim(0,40) +
  labs(x='Day', y='Infectious Period', title='Probability density for MERS infectious period (positive values only)', caption='Data from: https://github.com/rambaut/MERS???Cases/blob/gh???pages/data/cases.csv')
ggplotly(p=ggplot12)
## We recommend that you use the dev version of ggplot2 with `ggplotly()`
## Install it with: `devtools::install_github('hadley/ggplot2')`
#FACETING THE ABOVE GRAPH
ggplot13 <- ggplot(data=mers, mapping=aes(x=epi.day, y=infectious.period2)) +
  geom_point(mapping = aes(colour=country)) +
  facet_wrap(~ country) +
  scale_y_continuous(limits = c(0, 50)) +
  labs(x='Epidemic day', y='Infectious Period', title='MERS infectious period (positive values only) over time', caption='Data from: https://github.com/rambaut/MERS???Cases/blob/gh???pages/data/cases.csv')
ggplotly(p=ggplot13)
## We recommend that you use the dev version of ggplot2 with `ggplotly()`
## Install it with: `devtools::install_github('hadley/ggplot2')`
#ONLY GRAPH CERTAIN COUNTRIES (FACETING OF THE GRAPHS ALSO)
ggplot14 <- ggplot(data=subset(mers, gender %in% c('M', 'F') & country %in% c('KSA', 'Oman', 'Iran', 'Jordan', 'Qatar', 'South Korea', 'UAE'))) +
  geom_point(mapping = aes(x=epi.day, y=infectious.period2, colour=country)) +
  facet_grid(gender ~ country) +
  scale_y_continuous(limits = c(0, 50)) +
  labs(x='Epidemic day', y='Infectious Period', title='MERS infectious period by gender and country', caption='Data from: https://github.com/rambaut/MERS???Cases/blob/gh???pages/data/cases.csv')
ggplotly(p=ggplot14)
## We recommend that you use the dev version of ggplot2 with `ggplotly()`
## Install it with: `devtools::install_github('hadley/ggplot2')`
#DATA EXPLORATION
ggplot15 <- ggplot(data=subset(mers, gender %in% c('M', 'F') & country %in% c('KSA', 'Oman', 'Iran', 'Jordan', 'Qatar', 'South Korea', 'UAE'))) +
  geom_point(mapping = aes(x=epi.day, y=infectious.period2, colour=country)) +
  facet_grid(gender ~ country) +
  scale_y_continuous(limits = c(0, 50)) +
  labs(x='Epidemic day', y='Infectious Period', title='MERS infectious period by gender and country', caption='Data from: https://github.com/rambaut/MERS???Cases/blob/gh???pages/data/cases.csv')
ggplotly(p=ggplot15)
## We recommend that you use the dev version of ggplot2 with `ggplotly()`
## Install it with: `devtools::install_github('hadley/ggplot2')`
#CALCULATE CASE FATALITY IN MERS DATASET
#IFELSE CAN BE USED TO CONSTRUCT A VECTOR WITH TRUE/FALSE WHEN PERIOD < 0
#calculated infectious period in the case where it is positive and zero otherwise

Including Plots

You can also embed plots, for example:

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.